package com.hao.tool.web.util;

import cn.hutool.core.io.FileUtil;
import cn.hutool.http.HtmlUtil;
import com.hao.tool.common.util.HttpClientUtil;
import org.apache.commons.text.StringEscapeUtils;
import org.junit.Test;

import java.io.IOException;
import java.util.HashMap;

/**
 * description
 *
 * @author HaoZ on CMHK.hainan
 * @date 2021/6/1 上午9:08
 */
public class HTMLUtils {

    public void print(String content) {
        content = HtmlUtil.cleanHtmlTag(content);
        content = HtmlUtil.unescape(content);
        content = StringEscapeUtils.unescapeJava(content);
        System.out.println(content);
    }

    @Test
    public void test9() {
        String filePath = "/Users/haol/Downloads/Untitled-1.txt";
        String content = FileUtil.readUtf8String(filePath);
        print(content);
    }


    @Test
    public void HtmlAll() throws IOException {
        String content = HttpClientUtil.build().setLogStatus(true)
                .setChartStr("gb2312")
                .addHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36")
                .sendGet("https://wenku.baidu.com/view/0a9aa10c32687e21af45b307e87101f69f31fb70.html", new HashMap<>());
        print(content);

    }


    @Test
    public void test22() {
        String s = "我";
        String ss = StringEscapeUtils.unescapeJava(s);
        System.out.println(ss);
    }
}
